In [1]:
%matplotlib inline
from __future__ import print_function
import numpy as np
from six.moves import range
import keras.backend as K
from keras.models import Model, Sequential
from keras.engine.training import slice_X
from keras.layers import Lambda, Flatten, Permute, Reshape, Input
from keras.layers import merge, Merge, recurrent
from keras.layers import Activation, TimeDistributed, Dense, RepeatVector
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import pylab as pl
import matplotlib.cm as cm
from scipy.special import expit


Using Theano backend.

1. Prepare Data


In [2]:
class CharacterTable(object):
    '''
    Given a set of characters:
    + Encode them to a one hot integer representation
    + Decode the one hot integer representation to their character output
    + Decode a vector of probabilities to their character output
    '''
    def __init__(self, chars, maxlen):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
        self.maxlen = maxlen

    def encode(self, C, maxlen=None):
        maxlen = maxlen if maxlen else self.maxlen
        X = np.zeros((maxlen, len(self.chars)))
        for i, c in enumerate(C):
            X[i, self.char_indices[c]] = 1
        return X

    def decode(self, X, calc_argmax=True):
        if calc_argmax:
            X = X.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in X)

In [3]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [30]:
# Parameters for the model and dataset
TRAINING_SIZE = 100000
DIGITS = 5
OPS = 2
INVERT = True
# Try replacing GRU, or SimpleRNN
RNN = recurrent.LSTM
HIDDEN_SIZE = 16
BATCH_SIZE = 128
LAYERS = 1
MAXLEN = OPS * DIGITS + OPS - 1

In [31]:
chars = '0123456789+ '
ctable = CharacterTable(chars, MAXLEN)

In [32]:
def generate_data(training_size, num_digits, num_ops):
    questions = []
    expected = []
    seen = set()
    print('Generating data... ')
    while len(questions) < training_size:
#         f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, num_digits + 1))))
        f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(num_digits)))
        ops = []
        for i in range(num_ops):
            ops.append(f())
                    
        # Skip any addition questions we've already seen
        # Also skip any such that X+Y == Y+X (hence the sorting)
#         ops.sort()
        key = tuple(ops)
        if key in seen:
            continue
        seen.add(key)
        # Pad the data with spaces such that it is always MAXLEN
        ops_str = []
        format_str = '{:>' + str(num_digits) + '}'
        for op in ops:
            op_str = format_str.format(str(op))
            ops_str.append(op_str)
        
        q = '+'.join([str(op) for op in ops_str])
        query = q + ' ' * (MAXLEN - len(q))
        ans = str(sum(ops))
        # Answers can be of maximum size DIGITS + 1
        if INVERT:
            query = query[::-1]
            ans = ans[::-1]
        ans += ' ' * (num_digits + 1 - len(ans))
        questions.append(query)
        expected.append(ans)
#         print(len(questions))
    print('Total addition questions:', len(questions))
    
    return questions, expected

In [33]:
def create_train_valid(questions, expected, num_digits, num_ops, percentage):
    print('Vectorization...')
    X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
    y = np.zeros((len(questions), num_digits + 1, len(chars)), dtype=np.bool)
    for i, sentence in enumerate(questions):
        X[i] = ctable.encode(sentence, maxlen=MAXLEN)
    for i, sentence in enumerate(expected):
        y[i] = ctable.encode(sentence, maxlen=num_digits + 1)

    # Shuffle (X, y) in unison as the later parts of X will almost all be larger digits
    indices = np.arange(len(y))
    np.random.shuffle(indices)
    X = X[indices]
    y = y[indices]

    # Explicitly set apart 10% for validation data that we never train over
    split_at = len(X) - len(X)*percentage
    (X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
    (y_train, y_val) = (y[:split_at], y[split_at:])

    print(X_train.shape)
    print(y_train.shape)
    
    return X_train, y_train, X_val, y_val

In [34]:
questions, expected = generate_data(TRAINING_SIZE, DIGITS, OPS)
X_train, y_train, X_val, y_val = create_train_valid(questions, expected, DIGITS, OPS, 0.5)


Generating data... 
Total addition questions: 100000
Vectorization...
(50000, 11, 12)
(50000, 6, 12)
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:19: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

In [45]:
questions[0][::-1]


Out[45]:
'27068+48550'

In [36]:
expected[0]


Out[36]:
'81657 '

2. Modeling

2.1 Standard Encoder-decoder Model


In [41]:
def standard_seq2seq_model(hidden_size, num_layers, num_digits, num_ops):
    # Most simple seq2seq model using encoder-decoder framework
    print('Build model...')
    model = Sequential()
    # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
    # note: in a situation where your input sequences have a variable length,
    # use input_shape=(None, nb_feature).
    encoder = RNN(hidden_size, input_shape=(MAXLEN, len(chars)))
    model.add(encoder)
    # For the decoder's input, we repeat the encoded input for each time step
    model.add(RepeatVector(num_digits + 1))
    # The decoder RNN could be multiple layers stacked or a single layer
    for _ in range(num_layers):
        decoder = RNN(hidden_size, return_sequences=True)
        model.add(decoder)

    # For each of step of the output sequence, decide which character should be chosen
    mapper = TimeDistributed(Dense(len(chars)))
    model.add(mapper)
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    
    inputs = [K.learning_phase()] + model.inputs
    encoder_f = K.function(inputs, [encoder.output])
    decoder_f = K.function(inputs, [decoder.output])
    mapper_f = K.function(inputs, [mapper.output])
    
    return model, encoder_f, decoder_f, mapper_f, encoder, decoder, mapper

2.2 Attentional Encoder-decoder Model


In [43]:
import theano.tensor as T

def get_last_Y(X):
    return X[:, -1, :]

def get_Y(X, xmaxlen):
    return X[:, :xmaxlen, :]  # get first xmaxlen elem from time dim

def get_R(X):
    Y, alpha = X[0], X[1]
    ans = K.T.batched_dot(Y, alpha)
    return ans

def get_R_shape(input_shape):
    shape = list(input_shape)
    outshape = (shape[0][0],shape[0][1])
    return tuple(outshape)

def stack_decoder_input(X):
    ans = K.concatenate(X, axis=2)
    return ans

def stack_decoder_input_shape(input_shape):
    shape = list(input_shape)        
    outshape = (shape[0][0], len(shape), shape[0][2])
    return tuple(outshape)

def attentional_seq2seq_model(hidden_size, num_layers, num_digits, num_ops, chars):    
    main_input = Input(shape=(MAXLEN,len(chars)), name='main_input')
    
    encoder = RNN(hidden_size, 
                  input_shape=(MAXLEN, len(chars)),
                  return_sequences=True)(main_input)
    
    Y = Lambda(get_Y, arguments={"xmaxlen": MAXLEN}, name="Y", output_shape=(MAXLEN, hidden_size))(encoder)    
    Y_trans = Permute((2, 1), name="y_trans")(Y)  # of shape (None,300,20)
#     Input_trans = Permute((2, 1), name="input_trans")(main_input)

    r_array = []
    for idx in range(num_digits+1):
        WY = TimeDistributed(Dense(len(chars)), name="WY_"+str(idx))(Y)

        M = Activation('tanh', name="M_"+str(idx))(WY)
        alpha_ = TimeDistributed(Dense(1, activation='linear'), name="alpha_"+str(idx))(M)
        flat_alpha = Flatten(name="flat_alpha_"+str(idx))(alpha_)
        alpha = Dense(MAXLEN, activation='softmax', name="alpha"+str(idx))(flat_alpha)

        r_ = merge([Y_trans, alpha], output_shape=get_R_shape, name="r_"+str(idx), mode=get_R)
        r = Reshape((1,hidden_size))(r_)
        r_array.append(r)
        
    decoder_input = merge(r_array, mode=stack_decoder_input, output_shape=stack_decoder_input_shape)            
    decoded_result = RNN(hidden_size, input_shape=(num_digits+1, hidden_size), return_sequences=True)(decoder_input)
    mapping = TimeDistributed(Dense(len(chars)))(decoded_result)
    out = Activation('softmax')(mapping)
    
    model = Model(input=[main_input], output=out)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    
    inputs = [K.learning_phase()] + model.inputs
    a1 = model.get_layer('alpha1')
    a2 = model.get_layer('alpha2')
    a3 = model.get_layer('alpha3')
    alpha1_f = K.function(inputs, [a1.output])
    alpha2_f = K.function(inputs, [a2.output])
    alpha3_f = K.function(inputs, [a3.output])

    return model, alpha1_f, alpha2_f, alpha3_f, a1, a2, a3

3. Learning


In [38]:
def learning(model, X_train, y_train, iterations, X_val, y_val):
    y_true = []
    for idx in range(y_val.shape[0]):
        y_true.append(ctable.decode(y_val[idx]))

    training_obj = model.fit(X_train, y_train, batch_size=BATCH_SIZE, nb_epoch=iterations,
        validation_data=(X_val, y_val))

In [ ]:
std_model, encoder_f, decoder_f, mapper_f, encoder, decoder, mapper = standard_seq2seq_model(HIDDEN_SIZE, LAYERS, DIGITS, OPS)
# val_acc_2_2 = learning(model, X_train, y_train, 100, X_val, y_val)
learning(std_model, X_train, y_train, 200, X_val, y_val)


Build model...

In [44]:
att_model, alpha1_f, alpha2_f, alpha3_f, aplha1, alpha2, alpha3 = attentional_seq2seq_model(HIDDEN_SIZE, LAYERS, DIGITS, OPS, chars)
# val_acc_2_2 = learning(model, X_train, y_train, 100, X_val, y_val)
learning(att_model, X_train, y_train, 100, X_val, y_val)


Train on 50000 samples, validate on 50000 samples
Epoch 1/100
50000/50000 [==============================] - 15s - loss: 2.1826 - acc: 0.1842 - val_loss: 2.0421 - val_acc: 0.2266
Epoch 2/100
50000/50000 [==============================] - 16s - loss: 1.9922 - acc: 0.2359 - val_loss: 1.9565 - val_acc: 0.2461
Epoch 3/100
50000/50000 [==============================] - 20s - loss: 1.9441 - acc: 0.2519 - val_loss: 1.9334 - val_acc: 0.2569
Epoch 4/100
50000/50000 [==============================] - 16s - loss: 1.9275 - acc: 0.2568 - val_loss: 1.9174 - val_acc: 0.2618
Epoch 5/100
50000/50000 [==============================] - 16s - loss: 1.9071 - acc: 0.2627 - val_loss: 1.8873 - val_acc: 0.2733
Epoch 6/100
50000/50000 [==============================] - 22s - loss: 1.8634 - acc: 0.2857 - val_loss: 1.8375 - val_acc: 0.2975
Epoch 7/100
50000/50000 [==============================] - 16s - loss: 1.8186 - acc: 0.3061 - val_loss: 1.8049 - val_acc: 0.3152
Epoch 8/100
50000/50000 [==============================] - 16s - loss: 1.7903 - acc: 0.3226 - val_loss: 1.7786 - val_acc: 0.3276
Epoch 9/100
50000/50000 [==============================] - 16s - loss: 1.7706 - acc: 0.3344 - val_loss: 1.7770 - val_acc: 0.3343
Epoch 10/100
50000/50000 [==============================] - 16s - loss: 1.7550 - acc: 0.3425 - val_loss: 1.7472 - val_acc: 0.3470
Epoch 11/100
50000/50000 [==============================] - 21s - loss: 1.7416 - acc: 0.3481 - val_loss: 1.7420 - val_acc: 0.3316
Epoch 12/100
50000/50000 [==============================] - 17s - loss: 1.7316 - acc: 0.3513 - val_loss: 1.7299 - val_acc: 0.3420
Epoch 13/100
50000/50000 [==============================] - 17s - loss: 1.7216 - acc: 0.3560 - val_loss: 1.7208 - val_acc: 0.3515
Epoch 14/100
50000/50000 [==============================] - 18s - loss: 1.7145 - acc: 0.3581 - val_loss: 1.7163 - val_acc: 0.3552
Epoch 15/100
50000/50000 [==============================] - 17s - loss: 1.7076 - acc: 0.3605 - val_loss: 1.7030 - val_acc: 0.3650
Epoch 16/100
50000/50000 [==============================] - 19s - loss: 1.7007 - acc: 0.3630 - val_loss: 1.7022 - val_acc: 0.3628
Epoch 17/100
50000/50000 [==============================] - 19s - loss: 1.6960 - acc: 0.3635 - val_loss: 1.6941 - val_acc: 0.3692
Epoch 18/100
50000/50000 [==============================] - 14s - loss: 1.6901 - acc: 0.3663 - val_loss: 1.6891 - val_acc: 0.3659
Epoch 19/100
50000/50000 [==============================] - 14s - loss: 1.6859 - acc: 0.3674 - val_loss: 1.6841 - val_acc: 0.3663
Epoch 20/100
50000/50000 [==============================] - 13s - loss: 1.6810 - acc: 0.3687 - val_loss: 1.6803 - val_acc: 0.3642
Epoch 21/100
50000/50000 [==============================] - 14s - loss: 1.6767 - acc: 0.3700 - val_loss: 1.6765 - val_acc: 0.3684
Epoch 22/100
50000/50000 [==============================] - 14s - loss: 1.6726 - acc: 0.3713 - val_loss: 1.6692 - val_acc: 0.3756
Epoch 23/100
50000/50000 [==============================] - 13s - loss: 1.6687 - acc: 0.3724 - val_loss: 1.6733 - val_acc: 0.3650
Epoch 24/100
50000/50000 [==============================] - 17s - loss: 1.6656 - acc: 0.3720 - val_loss: 1.6664 - val_acc: 0.3711
Epoch 25/100
50000/50000 [==============================] - 19s - loss: 1.6617 - acc: 0.3741 - val_loss: 1.6619 - val_acc: 0.3713
Epoch 26/100
50000/50000 [==============================] - 19s - loss: 1.6585 - acc: 0.3749 - val_loss: 1.6725 - val_acc: 0.3614
Epoch 27/100
50000/50000 [==============================] - 18s - loss: 1.6563 - acc: 0.3748 - val_loss: 1.6538 - val_acc: 0.3795
Epoch 28/100
50000/50000 [==============================] - 16s - loss: 1.6524 - acc: 0.3760 - val_loss: 1.6560 - val_acc: 0.3757
Epoch 29/100
50000/50000 [==============================] - 16s - loss: 1.6505 - acc: 0.3761 - val_loss: 1.6474 - val_acc: 0.3796
Epoch 30/100
50000/50000 [==============================] - 16s - loss: 1.6466 - acc: 0.3779 - val_loss: 1.6440 - val_acc: 0.3813
Epoch 31/100
50000/50000 [==============================] - 16s - loss: 1.6451 - acc: 0.3777 - val_loss: 1.6418 - val_acc: 0.3798
Epoch 32/100
50000/50000 [==============================] - 16s - loss: 1.6423 - acc: 0.3782 - val_loss: 1.6452 - val_acc: 0.3771
Epoch 33/100
50000/50000 [==============================] - 17s - loss: 1.6383 - acc: 0.3803 - val_loss: 1.6369 - val_acc: 0.3809
Epoch 34/100
50000/50000 [==============================] - 17s - loss: 1.6369 - acc: 0.3798 - val_loss: 1.6441 - val_acc: 0.3729
Epoch 35/100
50000/50000 [==============================] - 17s - loss: 1.6344 - acc: 0.3804 - val_loss: 1.6326 - val_acc: 0.3813
Epoch 36/100
50000/50000 [==============================] - 17s - loss: 1.6323 - acc: 0.3808 - val_loss: 1.6291 - val_acc: 0.3834
Epoch 37/100
50000/50000 [==============================] - 17s - loss: 1.6288 - acc: 0.3825 - val_loss: 1.6269 - val_acc: 0.3829
Epoch 38/100
50000/50000 [==============================] - 16s - loss: 1.6261 - acc: 0.3829 - val_loss: 1.6351 - val_acc: 0.3737
Epoch 39/100
50000/50000 [==============================] - 16s - loss: 1.6233 - acc: 0.3839 - val_loss: 1.6241 - val_acc: 0.3810
Epoch 40/100
50000/50000 [==============================] - 16s - loss: 1.6224 - acc: 0.3832 - val_loss: 1.6198 - val_acc: 0.3849
Epoch 41/100
50000/50000 [==============================] - 17s - loss: 1.6193 - acc: 0.3849 - val_loss: 1.6188 - val_acc: 0.3838
Epoch 42/100
50000/50000 [==============================] - 17s - loss: 1.6176 - acc: 0.3858 - val_loss: 1.6169 - val_acc: 0.3832
Epoch 43/100
50000/50000 [==============================] - 18s - loss: 1.6155 - acc: 0.3865 - val_loss: 1.6145 - val_acc: 0.3862
Epoch 44/100
50000/50000 [==============================] - 17s - loss: 1.6136 - acc: 0.3864 - val_loss: 1.6124 - val_acc: 0.3853
Epoch 45/100
50000/50000 [==============================] - 17s - loss: 1.6135 - acc: 0.3868 - val_loss: 1.6119 - val_acc: 0.3871
Epoch 46/100
50000/50000 [==============================] - 17s - loss: 1.6097 - acc: 0.3886 - val_loss: 1.6079 - val_acc: 0.3900
Epoch 47/100
50000/50000 [==============================] - 21s - loss: 1.6091 - acc: 0.3877 - val_loss: 1.6083 - val_acc: 0.3845
Epoch 48/100
50000/50000 [==============================] - 20s - loss: 1.6062 - acc: 0.3891 - val_loss: 1.6060 - val_acc: 0.3894
Epoch 49/100
50000/50000 [==============================] - 17s - loss: 1.6048 - acc: 0.3895 - val_loss: 1.6047 - val_acc: 0.3896
Epoch 50/100
50000/50000 [==============================] - 17s - loss: 1.6038 - acc: 0.3902 - val_loss: 1.6025 - val_acc: 0.3899
Epoch 51/100
50000/50000 [==============================] - 17s - loss: 1.6023 - acc: 0.3898 - val_loss: 1.6005 - val_acc: 0.3911
Epoch 52/100
50000/50000 [==============================] - 17s - loss: 1.6021 - acc: 0.3890 - val_loss: 1.6056 - val_acc: 0.3903
Epoch 53/100
50000/50000 [==============================] - 16s - loss: 1.5991 - acc: 0.3917 - val_loss: 1.5990 - val_acc: 0.3918
Epoch 54/100
50000/50000 [==============================] - 17s - loss: 1.5977 - acc: 0.3911 - val_loss: 1.6039 - val_acc: 0.3844
Epoch 55/100
50000/50000 [==============================] - 17s - loss: 1.5974 - acc: 0.3910 - val_loss: 1.5961 - val_acc: 0.3930
Epoch 56/100
50000/50000 [==============================] - 17s - loss: 1.5959 - acc: 0.3915 - val_loss: 1.6135 - val_acc: 0.3802
Epoch 57/100
50000/50000 [==============================] - 17s - loss: 1.5946 - acc: 0.3926 - val_loss: 1.5944 - val_acc: 0.3922
Epoch 58/100
50000/50000 [==============================] - 17s - loss: 1.5938 - acc: 0.3924 - val_loss: 1.5923 - val_acc: 0.3948
Epoch 59/100
50000/50000 [==============================] - 17s - loss: 1.5924 - acc: 0.3930 - val_loss: 1.6022 - val_acc: 0.3839
Epoch 60/100
50000/50000 [==============================] - 17s - loss: 1.5926 - acc: 0.3918 - val_loss: 1.5907 - val_acc: 0.3940
Epoch 61/100
50000/50000 [==============================] - 19s - loss: 1.5920 - acc: 0.3917 - val_loss: 1.5910 - val_acc: 0.3913
Epoch 62/100
50000/50000 [==============================] - 16s - loss: 1.5899 - acc: 0.3926 - val_loss: 1.5907 - val_acc: 0.3925
Epoch 63/100
50000/50000 [==============================] - 20s - loss: 1.5885 - acc: 0.3937 - val_loss: 1.5944 - val_acc: 0.3884
Epoch 64/100
50000/50000 [==============================] - 14s - loss: 1.5885 - acc: 0.3930 - val_loss: 1.5918 - val_acc: 0.3904
Epoch 65/100
50000/50000 [==============================] - 14s - loss: 1.5876 - acc: 0.3932 - val_loss: 1.5854 - val_acc: 0.3961
Epoch 66/100
50000/50000 [==============================] - 14s - loss: 1.5854 - acc: 0.3948 - val_loss: 1.6019 - val_acc: 0.3811
Epoch 67/100
50000/50000 [==============================] - 15s - loss: 1.5859 - acc: 0.3938 - val_loss: 1.5884 - val_acc: 0.3905
Epoch 68/100
50000/50000 [==============================] - 16s - loss: 1.5835 - acc: 0.3943 - val_loss: 1.5825 - val_acc: 0.3965
Epoch 69/100
50000/50000 [==============================] - 14s - loss: 1.5837 - acc: 0.3946 - val_loss: 1.5865 - val_acc: 0.3937
Epoch 70/100
50000/50000 [==============================] - 14s - loss: 1.5828 - acc: 0.3943 - val_loss: 1.5803 - val_acc: 0.3965
Epoch 71/100
50000/50000 [==============================] - 15s - loss: 1.5814 - acc: 0.3946 - val_loss: 1.5814 - val_acc: 0.3956
Epoch 72/100
50000/50000 [==============================] - 17s - loss: 1.5809 - acc: 0.3943 - val_loss: 1.5828 - val_acc: 0.3940
Epoch 73/100
50000/50000 [==============================] - 15s - loss: 1.5803 - acc: 0.3944 - val_loss: 1.5845 - val_acc: 0.3914
Epoch 74/100
50000/50000 [==============================] - 16s - loss: 1.5784 - acc: 0.3954 - val_loss: 1.5809 - val_acc: 0.3941
Epoch 75/100
50000/50000 [==============================] - 17s - loss: 1.5772 - acc: 0.3953 - val_loss: 1.5745 - val_acc: 0.3962
Epoch 76/100
50000/50000 [==============================] - 16s - loss: 1.5741 - acc: 0.3957 - val_loss: 1.5729 - val_acc: 0.3958
Epoch 77/100
50000/50000 [==============================] - 17s - loss: 1.5714 - acc: 0.3953 - val_loss: 1.5707 - val_acc: 0.3940
Epoch 78/100
50000/50000 [==============================] - 15s - loss: 1.5679 - acc: 0.3964 - val_loss: 1.5644 - val_acc: 0.3992
Epoch 79/100
50000/50000 [==============================] - 15s - loss: 1.5658 - acc: 0.3968 - val_loss: 1.5633 - val_acc: 0.3979
Epoch 80/100
50000/50000 [==============================] - 14s - loss: 1.5602 - acc: 0.3992 - val_loss: 1.5614 - val_acc: 0.3973
Epoch 81/100
50000/50000 [==============================] - 13s - loss: 1.5596 - acc: 0.3983 - val_loss: 1.5634 - val_acc: 0.3926
Epoch 82/100
50000/50000 [==============================] - 13s - loss: 1.5539 - acc: 0.3997 - val_loss: 1.5580 - val_acc: 0.3963
Epoch 83/100
50000/50000 [==============================] - 13s - loss: 1.5517 - acc: 0.3996 - val_loss: 1.5526 - val_acc: 0.3980
Epoch 84/100
50000/50000 [==============================] - 13s - loss: 1.5471 - acc: 0.4016 - val_loss: 1.5454 - val_acc: 0.4012
Epoch 85/100
50000/50000 [==============================] - 14s - loss: 1.5438 - acc: 0.4009 - val_loss: 1.5446 - val_acc: 0.3997
Epoch 86/100
50000/50000 [==============================] - 15s - loss: 1.5385 - acc: 0.4025 - val_loss: 1.5375 - val_acc: 0.4030
Epoch 87/100
50000/50000 [==============================] - 16s - loss: 1.5327 - acc: 0.4039 - val_loss: 1.5300 - val_acc: 0.4025
Epoch 88/100
50000/50000 [==============================] - 14s - loss: 1.5248 - acc: 0.4054 - val_loss: 1.5222 - val_acc: 0.4061
Epoch 89/100
50000/50000 [==============================] - 16s - loss: 1.5196 - acc: 0.4066 - val_loss: 1.5185 - val_acc: 0.4043
Epoch 90/100
50000/50000 [==============================] - 16s - loss: 1.5111 - acc: 0.4089 - val_loss: 1.5083 - val_acc: 0.4097
Epoch 91/100
50000/50000 [==============================] - 18s - loss: 1.5047 - acc: 0.4098 - val_loss: 1.5026 - val_acc: 0.4102
Epoch 92/100
50000/50000 [==============================] - 19s - loss: 1.4975 - acc: 0.4127 - val_loss: 1.4940 - val_acc: 0.4131
Epoch 93/100
50000/50000 [==============================] - 19s - loss: 1.4911 - acc: 0.4144 - val_loss: 1.4881 - val_acc: 0.4156
Epoch 94/100
50000/50000 [==============================] - 17s - loss: 1.4852 - acc: 0.4169 - val_loss: 1.4826 - val_acc: 0.4178
Epoch 95/100
50000/50000 [==============================] - 20s - loss: 1.4807 - acc: 0.4178 - val_loss: 1.4814 - val_acc: 0.4184
Epoch 96/100
50000/50000 [==============================] - 18s - loss: 1.4767 - acc: 0.4196 - val_loss: 1.4764 - val_acc: 0.4207
Epoch 97/100
50000/50000 [==============================] - 18s - loss: 1.4711 - acc: 0.4221 - val_loss: 1.4698 - val_acc: 0.4239
Epoch 98/100
50000/50000 [==============================] - 17s - loss: 1.4670 - acc: 0.4240 - val_loss: 1.4637 - val_acc: 0.4250
Epoch 99/100
50000/50000 [==============================] - 15s - loss: 1.4633 - acc: 0.4258 - val_loss: 1.4625 - val_acc: 0.4263
Epoch 100/100
50000/50000 [==============================] - 14s - loss: 1.4600 - acc: 0.4266 - val_loss: 1.4582 - val_acc: 0.4285

4. Model Analysis


In [28]:
X_str = '13+21'
X_str = X_str[::-1]
print(X_str)
X = ctable.encode(X_str, maxlen=MAXLEN).reshape([1,5,12])
preds = std_model.predict(X, verbose=0)
y_hat = preds[0].argmax(axis=-1)
y_str = ''.join(ctable.indices_char[x] for x in y_hat)# ctable.indices_char[x]
print(y_str)
preds2 = att_model.predict(X, verbose=0)
y_hat2 = preds2[0].argmax(axis=-1)
y_str2 = ''.join(ctable.indices_char[x] for x in y_hat2)# ctable.indices_char[x]
print(y_str2)


12+31
43 

In [ ]: